{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "!pip install langchain faiss-gpu modelscope sentence_transformers"
      ],
      "metadata": {
        "id": "a3bc-AafRKMk"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "\n",
        "import pandas as pd\n",
        "from langchain.document_loaders import DataFrameLoader\n",
        "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
        "from langchain.vectorstores import FAISS"
      ],
      "metadata": {
        "id": "Fh-Z-ie4RHoY"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# 模型下载\n",
        "from modelscope import snapshot_download\n",
        "\n",
        "model_dir = snapshot_download('Jerry0/m3e-base')\n",
        "# model_dir\n",
        "embeddings = HuggingFaceEmbeddings(\n",
        "                model_name=model_dir,\n",
        "                model_kwargs={'device': 'cuda'})"
      ],
      "metadata": {
        "id": "XKcIorltRJgF"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# modelscope 下载的模型地址\n",
        "model_dir"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 58
        },
        "id": "EOPnBe2mTufn",
        "outputId": "d81bda18-c914-4ea4-b124-2d75bc7ed470"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'/root/.cache/modelscope/hub/Jerry0/m3e-base'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "data = {\n",
        "    'text': ['湖北省十四五规划有哪些功能定位?', '湖北省十四五规划中功能定位有多少个?',\n",
        "             '湖北省十四五规划有哪些基本遵循?', '湖北省十四五规划中基本遵循有多少个?',\n",
        "             '湖北省十四五规划有哪些重要项目?']\n",
        "    }\n",
        "df = pd.DataFrame(data)\n",
        "\n",
        "## 如果有现成的csv，直接导入就行\n",
        "# csv_file = 'data/qa_pairs.csv'\n",
        "# df = pd.read_csv(csv_file)\n",
        "\n",
        "loader = DataFrameLoader(df, page_content_column=\"text\")\n",
        "data = loader.load()"
      ],
      "metadata": {
        "id": "5Px26aBwTzuw"
      },
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "data"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3QOH1IZ4UaOC",
        "outputId": "901345b8-40f0-4672-c449-915fdfa631e7"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[Document(page_content='湖北省十四五规划有哪些功能定位?'),\n",
              " Document(page_content='湖北省十四五规划中功能定位有多少个?'),\n",
              " Document(page_content='湖北省十四五规划有哪些基本遵循?'),\n",
              " Document(page_content='湖北省十四五规划中基本遵循有多少个?'),\n",
              " Document(page_content='湖北省十四五规划有哪些重要项目?')]"
            ]
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "id": "QwvYdTjzRD3u"
      },
      "outputs": [],
      "source": [
        "# 向量库构建\n",
        "faiss_file = 'db/guihua_kg_faiss_store.faiss'\n",
        "db = FAISS.from_documents(data, embeddings)\n",
        "if not os.path.exists(faiss_file):\n",
        "    db = FAISS.from_documents(data, embeddings)\n",
        "    db.save_local(faiss_file)\n",
        "else:\n",
        "    db = FAISS.load_local(faiss_file, embeddings=embeddings)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "db.similarity_search('功能定位', k=3)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jot_V1qUUfx2",
        "outputId": "2899c2c0-4a23-48f5-d509-d67ce509cc01"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[Document(page_content='湖北省十四五规划中功能定位有多少个?'),\n",
              " Document(page_content='湖北省十四五规划有哪些功能定位?'),\n",
              " Document(page_content='湖北省十四五规划有哪些重要项目?')]"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "dEedL586UhO8"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}